# load data.table for faster operations
library(data.table)
library(tidyverse)
# use fread for fast reading of data csv files
  # ColumnNameCleaner.rmd should be run first
cases <- fread("Data/time_series_covid19_confirmed_US.csv")
vaccineCounty <- fread("Data/Vaccination/COVID-19_Vaccinations_in_the_United_States_County.csv")
#vaccineJurisdiction <- fread("Data/Vaccination/COVID-19_Vaccinations_in_the_United_States_Jurisdiction.csv")
pop <- fread("Data/Population/co-est2019-alldata.csv")
distancing <- fread("Data/Distancing/Unacast_Social_Distancing_Grades.csv")
masking <- fread("Data/Masking/mask-use-by-county.csv")
# preview the data tables
head(cases)
head(vaccineCounty)
#head(vaccineJurisdiction)
head(distancing)
head(masking)
head(pop)
# either create or fix the FIPS column using available data
cases[, FIPS := sprintf("%05d", cases[,FIPS])]
pop[, FIPS := sprintf("%02d%03d", pop[,STATE], pop[,COUNTY])]
# create a population density column too
distancing[, c("FIPS", "popDensity") := .(sprintf("%05d", county_fips), ((county_population / Shape__Area * 100) + 1))]
masking[, FIPS := sprintf("%05d", COUNTYFP)]
masking <- cbind("FIPS" = masking[, FIPS], masking[,!c("FIPS")] + 1)
# drops unneeded columns
cases <- cases[, !c("V1", "UID", "iso2", "iso3", "code3", "CombinedKey")]
# selects the most recent population estimate (2019) and drops what will be redundant columns
years2019 <- grep("^([^0-9]*)$|2019", colnames(pop))
pop <- pop[, ..years2019][, !c("SUMLEV", "STATE", "COUNTY", "STNAME", "CTYNAME")]
# drops what will be redundant columns
distancing <- distancing[, !c("OBJECTID", "state_fips", "state_name", "county_fips", "county_name")]
masking <- masking[, !c("COUNTYFP")]
# Overall total
totalCases <- grep("[0-9]{4}", colnames(cases))
cases[, TotalCases := rowSums(cases[, ..totalCases], na.rm = TRUE)]
Both 'totalCases' and '..totalCases' exist in calling scope. Please remove the '..totalCases' variable in calling scope for clarity.
# Total before vaccinations
# 22 January 2020 - 30 November 2020
# https://www.washingtonpost.com/nation/2020/12/14/first-covid-vaccines-new-york/
totalCases <- totalCases[1:314]
cases[, TotalCasesVaccines := rowSums(cases[, ..totalCases], na.rm = TRUE)]
Both 'totalCases' and '..totalCases' exist in calling scope. Please remove the '..totalCases' variable in calling scope for clarity.
grades <- c("A+"=4.3,"A"=4,"A-"=3.7,"B+"=3.3,"B"=3,"B-"=2.7, "C+"=2.3,"C"=2,"C-"=1.7,"D+"=1.3,"D"=1,"D-"= 0.7, "F"=0)

gfunc <- function(x) {
  grades[as.character(x)]
}

distancing[, c("grade_total", "grade_distance", "grade_visitation", "grade_encounters") :=
             .(gfunc(grade_total), gfunc(grade_distance), gfunc(grade_visitation), gfunc(grade_encounters))]
# set the FIPS column as the key for faster data.table operations
# time series
setkey(cases, FIPS)
setkey(vaccineCounty, FIPS)
setkey(distancing, FIPS)
# not time series
setkey(masking, FIPS)
setkey(pop, FIPS)
# pivot_wider(vaccineCounty, names_from = Date, values_from = )
csv <- cases[pop,][distancing,][masking,]
write.csv(csv, "Data/casesPopDistancingMasking.csv", row.names = FALSE)
openxlsx::write.xlsx(csv, "Data/casesPopDistancingMasking.xlsx", overwrite = TRUE)
dates <- grep("D[0-9]{4}_[0-9]{2}_[0-9]{2}", colnames(csv))
openxlsx::write.xlsx(csv[, !..dates], "Data/noDates.xlsx", overwrite = TRUE)
csv
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMgbG9hZCBkYXRhLnRhYmxlIGZvciBmYXN0ZXIgb3BlcmF0aW9ucwpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCgpgYGB7cn0KIyB1c2UgZnJlYWQgZm9yIGZhc3QgcmVhZGluZyBvZiBkYXRhIGNzdiBmaWxlcwogICMgQ29sdW1uTmFtZUNsZWFuZXIucm1kIHNob3VsZCBiZSBydW4gZmlyc3QKY2FzZXMgPC0gZnJlYWQoIkRhdGEvdGltZV9zZXJpZXNfY292aWQxOV9jb25maXJtZWRfVVMuY3N2IikKdmFjY2luZUNvdW50eSA8LSBmcmVhZCgiRGF0YS9WYWNjaW5hdGlvbi9DT1ZJRC0xOV9WYWNjaW5hdGlvbnNfaW5fdGhlX1VuaXRlZF9TdGF0ZXNfQ291bnR5LmNzdiIpCiN2YWNjaW5lSnVyaXNkaWN0aW9uIDwtIGZyZWFkKCJEYXRhL1ZhY2NpbmF0aW9uL0NPVklELTE5X1ZhY2NpbmF0aW9uc19pbl90aGVfVW5pdGVkX1N0YXRlc19KdXJpc2RpY3Rpb24uY3N2IikKcG9wIDwtIGZyZWFkKCJEYXRhL1BvcHVsYXRpb24vY28tZXN0MjAxOS1hbGxkYXRhLmNzdiIpCmRpc3RhbmNpbmcgPC0gZnJlYWQoIkRhdGEvRGlzdGFuY2luZy9VbmFjYXN0X1NvY2lhbF9EaXN0YW5jaW5nX0dyYWRlcy5jc3YiKQptYXNraW5nIDwtIGZyZWFkKCJEYXRhL01hc2tpbmcvbWFzay11c2UtYnktY291bnR5LmNzdiIpCmBgYAoKCmBgYHtyfQojIHByZXZpZXcgdGhlIGRhdGEgdGFibGVzCmhlYWQoY2FzZXMpCmhlYWQodmFjY2luZUNvdW50eSkKI2hlYWQodmFjY2luZUp1cmlzZGljdGlvbikKaGVhZChkaXN0YW5jaW5nKQpoZWFkKG1hc2tpbmcpCmhlYWQocG9wKQpgYGAKCgpgYGB7cn0KIyBlaXRoZXIgY3JlYXRlIG9yIGZpeCB0aGUgRklQUyBjb2x1bW4gdXNpbmcgYXZhaWxhYmxlIGRhdGEKY2FzZXNbLCBGSVBTIDo9IHNwcmludGYoIiUwNWQiLCBjYXNlc1ssRklQU10pXQpwb3BbLCBGSVBTIDo9IHNwcmludGYoIiUwMmQlMDNkIiwgcG9wWyxTVEFURV0sIHBvcFssQ09VTlRZXSldCiMgY3JlYXRlIGEgcG9wdWxhdGlvbiBkZW5zaXR5IGNvbHVtbiB0b28KZGlzdGFuY2luZ1ssIGMoIkZJUFMiLCAicG9wRGVuc2l0eSIpIDo9IC4oc3ByaW50ZigiJTA1ZCIsIGNvdW50eV9maXBzKSwgKChjb3VudHlfcG9wdWxhdGlvbiAvIFNoYXBlX19BcmVhICogMTAwKSArIDEpKV0KbWFza2luZ1ssIEZJUFMgOj0gc3ByaW50ZigiJTA1ZCIsIENPVU5UWUZQKV0KbWFza2luZyA8LSBjYmluZCgiRklQUyIgPSBtYXNraW5nWywgRklQU10sIG1hc2tpbmdbLCFjKCJGSVBTIildICsgMSkKYGBgCgoKYGBge3J9CiMgZHJvcHMgdW5uZWVkZWQgY29sdW1ucwpjYXNlcyA8LSBjYXNlc1ssICFjKCJWMSIsICJVSUQiLCAiaXNvMiIsICJpc28zIiwgImNvZGUzIiwgIkNvbWJpbmVkS2V5IildCiMgc2VsZWN0cyB0aGUgbW9zdCByZWNlbnQgcG9wdWxhdGlvbiBlc3RpbWF0ZSAoMjAxOSkgYW5kIGRyb3BzIHdoYXQgd2lsbCBiZSByZWR1bmRhbnQgY29sdW1ucwp5ZWFyczIwMTkgPC0gZ3JlcCgiXihbXjAtOV0qKSR8MjAxOSIsIGNvbG5hbWVzKHBvcCkpCnBvcCA8LSBwb3BbLCAuLnllYXJzMjAxOV1bLCAhYygiU1VNTEVWIiwgIlNUQVRFIiwgIkNPVU5UWSIsICJTVE5BTUUiLCAiQ1RZTkFNRSIpXQojIGRyb3BzIHdoYXQgd2lsbCBiZSByZWR1bmRhbnQgY29sdW1ucwpkaXN0YW5jaW5nIDwtIGRpc3RhbmNpbmdbLCAhYygiT0JKRUNUSUQiLCAic3RhdGVfZmlwcyIsICJzdGF0ZV9uYW1lIiwgImNvdW50eV9maXBzIiwgImNvdW50eV9uYW1lIildCm1hc2tpbmcgPC0gbWFza2luZ1ssICFjKCJDT1VOVFlGUCIpXQpgYGAKCgpgYGB7cn0KIyBPdmVyYWxsIHRvdGFsCnRvdGFsQ2FzZXMgPC0gZ3JlcCgiWzAtOV17NH0iLCBjb2xuYW1lcyhjYXNlcykpCmNhc2VzWywgVG90YWxDYXNlcyA6PSByb3dTdW1zKGNhc2VzWywgLi50b3RhbENhc2VzXSwgbmEucm0gPSBUUlVFKV0KIyBUb3RhbCBiZWZvcmUgdmFjY2luYXRpb25zCiMgMjIgSmFudWFyeSAyMDIwIC0gMzAgTm92ZW1iZXIgMjAyMAojIGh0dHBzOi8vd3d3Lndhc2hpbmd0b25wb3N0LmNvbS9uYXRpb24vMjAyMC8xMi8xNC9maXJzdC1jb3ZpZC12YWNjaW5lcy1uZXcteW9yay8KdG90YWxDYXNlcyA8LSB0b3RhbENhc2VzWzE6MzE0XQpjYXNlc1ssIFRvdGFsQ2FzZXNWYWNjaW5lcyA6PSByb3dTdW1zKGNhc2VzWywgLi50b3RhbENhc2VzXSwgbmEucm0gPSBUUlVFKV0KaGVhZChjYXNlcykKYGBgCgoKYGBge3J9CmdyYWRlcyA8LSBjKCJBKyI9NC4zLCJBIj00LCJBLSI9My43LCJCKyI9My4zLCJCIj0zLCJCLSI9Mi43LCAiQysiPTIuMywiQyI9MiwiQy0iPTEuNywiRCsiPTEuMywiRCI9MSwiRC0iPSAwLjcsICJGIj0wKQoKZ2Z1bmMgPC0gZnVuY3Rpb24oeCkgewogIGdyYWRlc1thcy5jaGFyYWN0ZXIoeCldCn0KCmRpc3RhbmNpbmdbLCBjKCJncmFkZV90b3RhbCIsICJncmFkZV9kaXN0YW5jZSIsICJncmFkZV92aXNpdGF0aW9uIiwgImdyYWRlX2VuY291bnRlcnMiKSA6PQogICAgICAgICAgICAgLihnZnVuYyhncmFkZV90b3RhbCksIGdmdW5jKGdyYWRlX2Rpc3RhbmNlKSwgZ2Z1bmMoZ3JhZGVfdmlzaXRhdGlvbiksIGdmdW5jKGdyYWRlX2VuY291bnRlcnMpKV0KYGBgCgoKYGBge3J9CiMgc2V0IHRoZSBGSVBTIGNvbHVtbiBhcyB0aGUga2V5IGZvciBmYXN0ZXIgZGF0YS50YWJsZSBvcGVyYXRpb25zCiMgdGltZSBzZXJpZXMKc2V0a2V5KGNhc2VzLCBGSVBTKQpzZXRrZXkodmFjY2luZUNvdW50eSwgRklQUykKc2V0a2V5KGRpc3RhbmNpbmcsIEZJUFMpCiMgbm90IHRpbWUgc2VyaWVzCnNldGtleShtYXNraW5nLCBGSVBTKQpzZXRrZXkocG9wLCBGSVBTKQpgYGAKCgpgYGB7cn0KIyBwaXZvdF93aWRlcih2YWNjaW5lQ291bnR5LCBuYW1lc19mcm9tID0gRGF0ZSwgdmFsdWVzX2Zyb20gPSApCmBgYAoKCmBgYHtyfQpjc3YgPC0gY2FzZXNbcG9wLF1bZGlzdGFuY2luZyxdW21hc2tpbmcsXQp3cml0ZS5jc3YoY3N2LCAiRGF0YS9jYXNlc1BvcERpc3RhbmNpbmdNYXNraW5nLmNzdiIsIHJvdy5uYW1lcyA9IEZBTFNFKQpvcGVueGxzeDo6d3JpdGUueGxzeChjc3YsICJEYXRhL2Nhc2VzUG9wRGlzdGFuY2luZ01hc2tpbmcueGxzeCIsIG92ZXJ3cml0ZSA9IFRSVUUpCmRhdGVzIDwtIGdyZXAoIkRbMC05XXs0fV9bMC05XXsyfV9bMC05XXsyfSIsIGNvbG5hbWVzKGNzdikpCm9wZW54bHN4Ojp3cml0ZS54bHN4KGNzdlssICEuLmRhdGVzXSwgIkRhdGEvbm9EYXRlcy54bHN4Iiwgb3ZlcndyaXRlID0gVFJVRSkKY3N2CmBgYAoK